sync w/ head.
#define QUEUE_SIZE 2048
#define pte_offset_kernel pte_offset
#else
+#ifdef CONFIG_SMP
+#define QUEUE_SIZE 1
+#else
#define QUEUE_SIZE 128
#endif
+#endif
-static mmu_update_t update_queue[QUEUE_SIZE];
-unsigned int mmu_update_queue_idx = 0;
-#define idx mmu_update_queue_idx
+DEFINE_PER_CPU(mmu_update_t, update_queue[QUEUE_SIZE]);
+DEFINE_PER_CPU(unsigned int, mmu_update_queue_idx);
- #if MMU_UPDATE_DEBUG > 0
- page_update_debug_t update_debug_queue[QUEUE_SIZE] = {{0}};
- #undef queue_l1_entry_update
- #undef queue_l2_entry_update
- #endif
- #if MMU_UPDATE_DEBUG > 3
- static void DEBUG_allow_pt_reads(void)
- {
- int cpu = smp_processor_id();
- int idx = per_cpu(mmu_update_queue_idx, cpu);
- pte_t *pte;
- mmu_update_t update;
- int i;
- for ( i = idx-1; i >= 0; i-- )
- {
- pte = update_debug_queue[i].ptep;
- if ( pte == NULL ) continue;
- update_debug_queue[i].ptep = NULL;
- update.ptr = virt_to_machine(pte);
- update.val = update_debug_queue[i].pteval;
- HYPERVISOR_mmu_update(&update, 1, NULL);
- }
- }
- static void DEBUG_disallow_pt_read(unsigned long va)
- {
- int cpu = smp_processor_id();
- int idx = per_cpu(mmu_update_queue_idx, cpu);
- pte_t *pte;
- pmd_t *pmd;
- pgd_t *pgd;
- unsigned long pteval;
- /*
- * We may fault because of an already outstanding update.
- * That's okay -- it'll get fixed up in the fault handler.
- */
- mmu_update_t update;
- pgd = pgd_offset_k(va);
- pmd = pmd_offset(pgd, va);
- pte = pte_offset_kernel(pmd, va); /* XXXcl */
- update.ptr = virt_to_machine(pte);
- pteval = *(unsigned long *)pte;
- update.val = pteval & ~_PAGE_PRESENT;
- HYPERVISOR_mmu_update(&update, 1, NULL);
- update_debug_queue[idx].ptep = pte;
- update_debug_queue[idx].pteval = pteval;
- }
- #endif
-
- #if MMU_UPDATE_DEBUG > 1
- #undef queue_pt_switch
- #undef queue_tlb_flush
- #undef queue_invlpg
- #undef queue_pgd_pin
- #undef queue_pgd_unpin
- #undef queue_pte_pin
- #undef queue_pte_unpin
- #undef queue_set_ldt
- #endif
-
-
/*
* MULTICALL_flush_page_update_queue:
* This is a version of the flush which queues as part of a multicall.
unsigned long flags;
unsigned int _idx;
spin_lock_irqsave(&update_lock, flags);
+ idx = per_cpu(mmu_update_queue_idx, cpu);
if ( (_idx = idx) != 0 )
{
- #if MMU_UPDATE_DEBUG > 1
- if (idx > 1)
- printk("Flushing %d entries from pt update queue\n", idx);
- #endif
- #if MMU_UPDATE_DEBUG > 3
- DEBUG_allow_pt_reads();
- #endif
- idx = 0;
+ per_cpu(mmu_update_queue_idx, cpu) = 0;
wmb(); /* Make sure index is cleared first to avoid double updates. */
queue_multicall3(__HYPERVISOR_mmu_update,
- (unsigned long)update_queue,
+ (unsigned long)&per_cpu(update_queue[0], cpu),
(unsigned long)_idx,
(unsigned long)NULL);
}
static inline void __flush_page_update_queue(void)
{
- unsigned int _idx = idx;
- idx = 0;
+ int cpu = smp_processor_id();
+ unsigned int _idx = per_cpu(mmu_update_queue_idx, cpu);
- #if MMU_UPDATE_DEBUG > 1
- if (_idx > 1)
- printk("Flushing %d entries from pt update queue\n", idx);
- #endif
- #if MMU_UPDATE_DEBUG > 3
- DEBUG_allow_pt_reads();
- #endif
+ per_cpu(mmu_update_queue_idx, cpu) = 0;
wmb(); /* Make sure index is cleared first to avoid double updates. */
- if ( unlikely(HYPERVISOR_mmu_update(update_queue, _idx, NULL) < 0) )
+ if ( unlikely(HYPERVISOR_mmu_update(&per_cpu(update_queue[0], cpu), _idx, NULL) < 0) )
{
printk(KERN_ALERT "Failed to execute MMU updates.\n");
BUG();
void queue_l1_entry_update(pte_t *ptr, unsigned long val)
{
+ int cpu = smp_processor_id();
+ int idx;
unsigned long flags;
spin_lock_irqsave(&update_lock, flags);
- #if MMU_UPDATE_DEBUG > 3
- DEBUG_disallow_pt_read((unsigned long)ptr);
- #endif
- update_queue[idx].ptr = virt_to_machine(ptr);
- update_queue[idx].val = val;
+ idx = per_cpu(mmu_update_queue_idx, cpu);
+ per_cpu(update_queue[idx], cpu).ptr = virt_to_machine(ptr);
+ per_cpu(update_queue[idx], cpu).val = val;
increment_index();
spin_unlock_irqrestore(&update_lock, flags);
}
/* queue and flush versions of the above */
void xen_l1_entry_update(pte_t *ptr, unsigned long val)
{
+ int cpu = smp_processor_id();
+ int idx;
unsigned long flags;
spin_lock_irqsave(&update_lock, flags);
- #if MMU_UPDATE_DEBUG > 3
- DEBUG_disallow_pt_read((unsigned long)ptr);
- #endif
- update_queue[idx].ptr = virt_to_machine(ptr);
- update_queue[idx].val = val;
+ idx = per_cpu(mmu_update_queue_idx, cpu);
+ per_cpu(update_queue[idx], cpu).ptr = virt_to_machine(ptr);
+ per_cpu(update_queue[idx], cpu).val = val;
increment_index_and_flush();
spin_unlock_irqrestore(&update_lock, flags);
}
void xen_pte_unpin(unsigned long ptr);
void xen_set_ldt(unsigned long ptr, unsigned long bytes);
void xen_machphys_update(unsigned long mfn, unsigned long pfn);
- #define MMU_UPDATE_DEBUG 0
-
- #if MMU_UPDATE_DEBUG > 0
- typedef struct {
- void *ptr;
- unsigned long val, pteval;
- void *ptep;
- int line; char *file;
- } page_update_debug_t;
- extern page_update_debug_t update_debug_queue[];
- #define queue_l1_entry_update(_p,_v) ({ \
- update_debug_queue[mmu_update_queue_idx].ptr = (_p); \
- update_debug_queue[mmu_update_queue_idx].val = (_v); \
- update_debug_queue[mmu_update_queue_idx].line = __LINE__; \
- update_debug_queue[mmu_update_queue_idx].file = __FILE__; \
- queue_l1_entry_update((_p),(_v)); \
- })
- #define queue_l2_entry_update(_p,_v) ({ \
- update_debug_queue[mmu_update_queue_idx].ptr = (_p); \
- update_debug_queue[mmu_update_queue_idx].val = (_v); \
- update_debug_queue[mmu_update_queue_idx].line = __LINE__; \
- update_debug_queue[mmu_update_queue_idx].file = __FILE__; \
- queue_l2_entry_update((_p),(_v)); \
- })
- #endif
-
- #if MMU_UPDATE_DEBUG > 1
- #if MMU_UPDATE_DEBUG > 2
- #undef queue_l1_entry_update
- #define queue_l1_entry_update(_p,_v) ({ \
- update_debug_queue[mmu_update_queue_idx].ptr = (_p); \
- update_debug_queue[mmu_update_queue_idx].val = (_v); \
- update_debug_queue[mmu_update_queue_idx].line = __LINE__; \
- update_debug_queue[mmu_update_queue_idx].file = __FILE__; \
- printk("L1 %s %d: %p/%08lx (%08lx -> %08lx)\n", __FILE__, __LINE__, \
- (_p), virt_to_machine(_p), pte_val(*(_p)), \
- (unsigned long)(_v)); \
- queue_l1_entry_update((_p),(_v)); \
- })
- #endif
- #undef queue_l2_entry_update
- #define queue_l2_entry_update(_p,_v) ({ \
- update_debug_queue[mmu_update_queue_idx].ptr = (_p); \
- update_debug_queue[mmu_update_queue_idx].val = (_v); \
- update_debug_queue[mmu_update_queue_idx].line = __LINE__; \
- update_debug_queue[mmu_update_queue_idx].file = __FILE__; \
- printk("L2 %s %d: %p/%08lx (%08lx -> %08lx)\n", __FILE__, __LINE__, \
- (_p), virt_to_machine(_p), pmd_val(*_p), \
- (unsigned long)(_v)); \
- queue_l2_entry_update((_p),(_v)); \
- })
- #define queue_pt_switch(_p) ({ \
- printk("PTSWITCH %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \
- queue_pt_switch(_p); \
- })
- #define queue_tlb_flush() ({ \
- printk("TLB FLUSH %s %d\n", __FILE__, __LINE__); \
- queue_tlb_flush(); \
- })
- #define queue_invlpg(_p) ({ \
- printk("INVLPG %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \
- queue_invlpg(_p); \
- })
- #define queue_pgd_pin(_p) ({ \
- printk("PGD PIN %s %d: %08lx/%08lx\n", __FILE__, __LINE__, (_p), \
- phys_to_machine(_p)); \
- queue_pgd_pin(_p); \
- })
- #define queue_pgd_unpin(_p) ({ \
- printk("PGD UNPIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \
- queue_pgd_unpin(_p); \
- })
- #define queue_pte_pin(_p) ({ \
- printk("PTE PIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \
- queue_pte_pin(_p); \
- })
- #define queue_pte_unpin(_p) ({ \
- printk("PTE UNPIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \
- queue_pte_unpin(_p); \
- })
- #define queue_set_ldt(_p,_l) ({ \
- printk("SETL LDT %s %d: %08lx %d\n", __FILE__, __LINE__, (_p), (_l)); \
- queue_set_ldt((_p), (_l)); \
- })
- #endif
void _flush_page_update_queue(void);
-static inline int flush_page_update_queue(void)
-{
- unsigned int idx = mmu_update_queue_idx;
- if ( idx != 0 ) _flush_page_update_queue();
- return idx;
-}
+#define flush_page_update_queue() do { \
+ DECLARE_PER_CPU(unsigned int, mmu_update_queue_idx); \
+ if (per_cpu(mmu_update_queue_idx, smp_processor_id())) \
+ _flush_page_update_queue(); \
+} while (0)
#define xen_flush_page_update_queue() (_flush_page_update_queue())
#define XEN_flush_page_update_queue() (_flush_page_update_queue())
void MULTICALL_flush_page_update_queue(void);
unsigned long va = 0, deferred_ops, pfn, prev_pfn = 0;
struct pfn_info *page;
int rc = 0, okay = 1, i, cpu = smp_processor_id();
- unsigned int cmd;
+ unsigned int cmd, done = 0;
unsigned long prev_spfn = 0;
l1_pgentry_t *prev_spl1e = 0;
- struct domain *d = current;
+ struct exec_domain *ed = current;
+ struct domain *d = ed->domain;
u32 type_info;
perfc_incrc(calls_to_mmu_update);
perfc_addc(num_page_updates, count);
+ LOCK_BIGLOCK(d);
+
cleanup_writable_pagetable(d, PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE);
- if ( unlikely(!access_ok(VERIFY_READ, ureqs, count * sizeof(req))) ) {
+ /*
+ * If we are resuming after preemption, read how much work we have already
+ * done. This allows us to set the @done output parameter correctly.
+ */
+ if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
+ {
+ count &= ~MMU_UPDATE_PREEMPTED;
+ if ( unlikely(pdone != NULL) )
+ (void)get_user(done, pdone);
+ }
+
+ if ( unlikely(!array_access_ok(VERIFY_READ, ureqs, count, sizeof(req))) )
+ UNLOCK_BIGLOCK(d);
return -EFAULT;
+ }
for ( i = 0; i < count; i++ )
{
percpu_info[cpu].foreign = NULL;
}
- if ( unlikely(success_count != NULL) )
- put_user(i, success_count);
+ /* Add incremental work we have done to the @done output parameter. */
+ if ( unlikely(pdone != NULL) )
+ __put_user(done + i, pdone);
+ UNLOCK_BIGLOCK(d);
return rc;
}
trap_info_t cur;
trap_info_t *dst = current->thread.traps;
+ LOCK_BIGLOCK(current->domain);
+
for ( ; ; )
{
- locked_hypercall_may_preempt(current->domain,
- __HYPERVISOR_set_trap_table, 1, traps);
+ if ( hypercall_preempt_check() )
+ {
+ hypercall_create_continuation(
+ __HYPERVISOR_set_trap_table, 1, traps);
++ UNLOCK_BIGLOCK(current->domain);
+ return __HYPERVISOR_set_trap_table;
+ }
if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
GET_CURRENT(%ebx)
jmp test_all_events
- ALIGN
- /*
- * HYPERVISOR_multicall(call_list, nr_calls)
- * Execute a list of 'nr_calls' hypercalls, pointed at by 'call_list'.
- * This is fairly easy except that:
- * 1. We may fault reading the call list, and must patch that up; and
- * 2. We cannot recursively call HYPERVISOR_multicall, or a malicious
- * caller could cause our stack to blow up.
- */
- #define MULTICALL_ENTRY_ORDER 5
- do_multicall:
- popl %eax
- cmpl $SYMBOL_NAME(multicall_return_from_call),%eax
- je multicall_return_from_call
- pushl %ebx
- movl 4(%esp),%ebx /* EBX == call_list */
- movl 8(%esp),%ecx /* ECX == nr_calls */
- /* Ensure the entire multicall list is below HYPERVISOR_VIRT_START. */
- movl %ecx,%eax
- shll $MULTICALL_ENTRY_ORDER,%eax
- addl %ebx,%eax /* EAX == end of multicall list */
- jc bad_multicall_address
- cmpl $__HYPERVISOR_VIRT_START,%eax
- jnc bad_multicall_address
- multicall_loop:
- pushl %ecx
- movl 4(%esp),%ecx # %ecx = struct domain
- movl EDOMAIN_processor(%ecx),%eax
- shl $6,%eax # sizeof(irq_cpustat) == 64
- testl $~0,SYMBOL_NAME(irq_stat)(%eax,1)
- jnz multicall_preempt
- multicall_fault1:
- pushl 20(%ebx) # args[4]
- multicall_fault2:
- pushl 16(%ebx) # args[3]
- multicall_fault3:
- pushl 12(%ebx) # args[2]
- multicall_fault4:
- pushl 8(%ebx) # args[1]
- multicall_fault5:
- pushl 4(%ebx) # args[0]
- multicall_fault6:
- movl (%ebx),%eax # op
- andl $(NR_hypercalls-1),%eax
- call *SYMBOL_NAME(hypercall_table)(,%eax,4)
- multicall_return_from_call:
- multicall_fault7:
- movl %eax,24(%ebx) # args[5] == result
- addl $20,%esp
- popl %ecx
- addl $(1<<MULTICALL_ENTRY_ORDER),%ebx
- loop multicall_loop
- popl %ebx
- xorl %eax,%eax
- jmp ret_from_hypercall
-
- multicall_preempt:
- # NB. remaining nr_calls is already at top of stack
- pushl %ebx # call_list
- pushl $2 # nr_args == 2
- pushl $__HYPERVISOR_multicall # op == __HYPERVISOR_multicall
- call hypercall_create_continuation
- addl $16,%esp
- popl %ebx
- movl $__HYPERVISOR_multicall,%eax
- jmp ret_from_hypercall
-
- bad_multicall_address:
- popl %ebx
- movl $-EFAULT,%eax
- jmp ret_from_hypercall
-
- .section __ex_table,"a"
- .align 4
- .long multicall_fault1, multicall_fixup1
- .long multicall_fault2, multicall_fixup2
- .long multicall_fault3, multicall_fixup3
- .long multicall_fault4, multicall_fixup4
- .long multicall_fault5, multicall_fixup5
- .long multicall_fault6, multicall_fixup6
- .long multicall_fault7, multicall_fixup6
- .previous
-
- .section .fixup,"ax"
- multicall_fixup6:
- addl $4,%esp
- multicall_fixup5:
- addl $4,%esp
- multicall_fixup4:
- addl $4,%esp
- multicall_fixup3:
- addl $4,%esp
- multicall_fixup2:
- addl $4,%esp
- multicall_fixup1:
- addl $4,%esp
- popl %ebx
- movl $-EFAULT,%eax
- jmp ret_from_hypercall
- .previous
-
ALIGN
restore_all_guest:
- testb $TF_failsafe_return,DOMAIN_thread_flags(%ebx)
+ testb $TF_failsafe_return,EDOMAIN_thread_flags(%ebx)
jnz failsafe_callback
FLT1: movl XREGS_ds(%esp),%ds
FLT2: movl XREGS_es(%esp),%es
return 1;
}
- int fixup_seg(u16 seg, int positive_access)
+ int fixup_seg(u16 seg, unsigned long offset)
{
- struct domain *d = current;
+ struct exec_domain *d = current;
unsigned long *table, a, b, base, limit;
int ldt = !!(seg & 4);
int idx = (seg >> 3) & 8191;
struct pfn_info *page;
unsigned long i;
- if ( unlikely(!access_ok(VERIFY_WRITE, extent_list,
- nr_extents*sizeof(*extent_list))) )
- return 0;
+ if ( unlikely(!array_access_ok(VERIFY_WRITE, extent_list,
+ nr_extents, sizeof(*extent_list))) )
+ return start_extent;
- if ( (extent_order != 0) && !IS_CAPABLE_PHYSDEV(current) )
+ if ( (extent_order != 0) && !IS_CAPABLE_PHYSDEV(current->domain) )
{
DPRINTK("Only I/O-capable domains may allocate > order-0 memory.\n");
- return 0;
+ return start_extent;
}
- for ( i = 0; i < nr_extents; i++ )
+ for ( i = start_extent; i < nr_extents; i++ )
{
- hypercall_may_preempt(
- __HYPERVISOR_dom_mem_op, 5,
- MEMOP_increase_reservation,
- &extent_list[i], nr_extents-i, extent_order,
- (d == current->domain) ? DOMID_SELF : d->id);
+ PREEMPT_CHECK(MEMOP_increase_reservation);
if ( unlikely((page = alloc_domheap_pages(d, extent_order)) == NULL) )
{
return i;
}
-
- long do_dom_mem_op(unsigned int op,
- unsigned long *extent_list,
- unsigned long nr_extents,
- unsigned int extent_order,
- domid_t domid)
+
+ long
+ do_dom_mem_op(unsigned long op,
+ unsigned long *extent_list,
+ unsigned long nr_extents,
+ unsigned int extent_order,
+ domid_t domid)
{
struct domain *d;
- long rc;
+ unsigned long rc, start_extent;
+
+ /* Extract @start_extent from @op. */
+ start_extent = op >> START_EXTENT_SHIFT;
+ op &= (1 << START_EXTENT_SHIFT) - 1;
+
+ if ( unlikely(start_extent > nr_extents) ||
+ unlikely(nr_extents > (~0UL >> START_EXTENT_SHIFT)) )
+ return -EINVAL;
if ( likely(domid == DOMID_SELF) )
- d = current;
- else if ( unlikely(!IS_PRIV(current)) )
+ d = current->domain;
+ else if ( unlikely(!IS_PRIV(current->domain)) )
return -EPERM;
else if ( unlikely((d = find_domain_by_id(domid)) == NULL) )
return -ESRCH;